Run UMAP on CD4+ events which express a COMPASS subset. Repeat for CD8 events.
The question being asked is “What are the memory and activation profiles of Ag-specific T cells?”
This time around, don’t sample the events.
Don’t stratify by groups, but rather color the sub-localization of the different markers.
Color by Cohort and Antigen (S1, S2, NCAP, VEMP, including DMSO)
Also color by
- Degree of functionality
- Cytokine - CD45RA
- CCR7
- HLA-DR
- CD38
Boxplots?
library(openCyto)
library(CytoML) # 1.12.0
library(flowCore) # required for description()
library(flowWorkspace) # required for gh_pop_get_data()
library(here)
library(tidyverse)
library(uwot)
library(ggplot2)
library(scales)
library(patchwork)
library(hues)
library(RColorBrewer)
library(ggrepel)
library(ggpubr)
library(tidyselect)
source(here::here("scripts/20200604_Helper_Functions.R")) # for distributeEvents() and sampleGatingHierarchy()
date <- 20200814
save_output <- TRUE
rerun_dimred <- FALSE
stims_for_compass_runs <- c("VEMP", "Spike 1", "Spike 2", "NCAP")
parent_nodes_for_compass_runs <- c("4+", "NOT4+", "8+")
stims_for_compass_runs_rep <- rep(stims_for_compass_runs, each = length(parent_nodes_for_compass_runs))
parent_nodes_for_compass_runs_rep <- rep(parent_nodes_for_compass_runs, times = length(stims_for_compass_runs))
crList <- purrr::pmap(.l = list(parent_nodes_for_compass_runs_rep,
stims_for_compass_runs_rep),
.f = function(parent, currentStim) {
currentStimForFile <- gsub(" ", "_", currentStim)
crPath <- here::here(sprintf("out/CompassOutput/%s/%s/COMPASSResult_%s_%s.rds", parent, currentStimForFile, parent, currentStimForFile))
readRDS(crPath)
}) %>%
set_names(gsub("+", "", gsub(" ", "_", paste0(parent_nodes_for_compass_runs_rep, "_", stims_for_compass_runs_rep)), fixed=TRUE))
names(crList)
## [1] "4_VEMP" "NOT4_VEMP" "8_VEMP" "4_Spike_1" "NOT4_Spike_1"
## [6] "8_Spike_1" "4_Spike_2" "NOT4_Spike_2" "8_Spike_2" "4_NCAP"
## [11] "NOT4_NCAP" "8_NCAP"
crList.no_healthy <- lapply(names(crList),
function(cr_name) {
cr <- crList[[cr_name]]
print(sprintf("Accessing %s", cr_name))
cr$data$meta <- cr$data$meta %>%
dplyr::filter(!(`SAMPLE ID` %in% setdiff(cr$data$meta$`SAMPLE ID`, rownames(cr$fit$mean_gamma)))) %>%
mutate(Cohort = factor(ifelse(Cohort %in%
c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", Cohort),
levels = rev(c("Healthy", "Non-hospitalized", "Hospitalized"))))
stopifnot(all.equal(rownames(cr$fit$mean_gamma), cr$data$meta$`SAMPLE ID`))
stopifnot(all.equal(rownames(cr$data$n_s), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(rownames(cr$data$n_u), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(names(cr$data$counts_s), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(names(cr$data$counts_u), rownames(cr$fit$mean_gamma)))
not_healthy_idx <- which(cr$data$meta$Cohort != "Healthy")
cr$data$meta <- cr$data$meta[not_healthy_idx,] %>%
mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")))
cr$fit$mean_gamma <- cr$fit$mean_gamma[not_healthy_idx,]
cr$data$n_s <- cr$data$n_s[not_healthy_idx,]
cr$data$n_u <- cr$data$n_u[not_healthy_idx,]
cr$data$counts_s <- cr$data$counts_s[not_healthy_idx]
cr$data$counts_u <- cr$data$counts_u[not_healthy_idx]
cr
})
## [1] "Accessing 4_VEMP"
## [1] "Accessing NOT4_VEMP"
## [1] "Accessing 8_VEMP"
## [1] "Accessing 4_Spike_1"
## [1] "Accessing NOT4_Spike_1"
## [1] "Accessing 8_Spike_1"
## [1] "Accessing 4_Spike_2"
## [1] "Accessing NOT4_Spike_2"
## [1] "Accessing 8_Spike_2"
## [1] "Accessing 4_NCAP"
## [1] "Accessing NOT4_NCAP"
## [1] "Accessing 8_NCAP"
names(crList.no_healthy) <- names(crList)
CD4RunNames <- c("4_Spike_1", "4_Spike_2", "4_NCAP", "4_VEMP")
NotCD4RunNames <- c("NOT4_Spike_1", "NOT4_Spike_2", "NOT4_NCAP", "NOT4_VEMP")
CD8RunNames <- c("8_Spike_1", "8_Spike_2", "8_NCAP", "8_VEMP")
gs <- load_gs(here::here("out/GatingSets/20200805_HAARVI_ICS_GatingSet_AllBatches"))
## loading R object...
## loading tree object...
## Done
pData(gs)$Batch <- ifelse(pData(gs)$`EXPERIMENT NAME` == "20200605_COVID_ICS-B3", 3, pData(gs)$Batch)
gs2 <- subset(gs, !(`SAMPLE ID` %in% c("37C", "BWT23", "116C", "BWT22")) &
!(`SAMPLE ID` == "551432" & STIM == "Spike 2"))
dput(gh_get_pop_paths(gs2))
## c("root", "/Time", "/Time/LD-3+", "/Time/LD-3+/1419-3+", "/Time/LD-3+/1419-3+/S",
## "/Time/LD-3+/1419-3+/S/Lymph", "/Time/LD-3+/1419-3+/S/Lymph/4+",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/CD38+",
## "/Time/LD-3+/1419-3+/S/Lymph/HLADR+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CD45RA+",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+/107a",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/154", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL2",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL4513",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+/CCR7+",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD45RA+")
# Add the CD4+ COMPASS boolean cytokine subset gates to the GatingSet
cd4_categories <- unique(do.call(rbind, lapply(CD4RunNames, function(runName) {
# Filter the subsets to those where the average mean_gamma is greater than the threshold (default in heatmap and this function is 0.01)
filteredSubsetIndicesCurrentRun <- which(apply(crList.no_healthy[[runName]]$fit$mean_gamma, 2, function(x) { mean(x, na.rm = TRUE) }) > 0.01) # names()
# TODO: Assuming here that mean_gamma colnames are in same order as categories rows. Should do formal check.
crList.no_healthy[[runName]]$data$categories[filteredSubsetIndicesCurrentRun,]
})))
# Remove the category with zero positive cytokines
cd4_categories <- cd4_categories[-which(cd4_categories[,"Counts"] == 0),]
knitr::kable(cd4_categories)
| IL2 | IL4/5/13 | IFNg | TNFa | IL17a | CD154 | CD107a | Counts |
|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
| 1 | 1 | 0 | 0 | 0 | 0 | 0 | 2 |
| 1 | 0 | 0 | 0 | 0 | 1 | 0 | 2 |
| 0 | 0 | 1 | 0 | 0 | 1 | 0 | 2 |
| 0 | 0 | 0 | 1 | 0 | 1 | 0 | 2 |
| 1 | 0 | 1 | 1 | 0 | 0 | 0 | 3 |
| 1 | 0 | 1 | 0 | 0 | 1 | 0 | 3 |
| 1 | 0 | 0 | 1 | 0 | 1 | 0 | 3 |
| 0 | 0 | 1 | 1 | 0 | 1 | 0 | 3 |
| 1 | 1 | 0 | 1 | 0 | 1 | 0 | 4 |
| 1 | 0 | 1 | 1 | 0 | 1 | 0 | 4 |
| 1 | 1 | 1 | 1 | 0 | 1 | 0 | 5 |
| 1 | 0 | 1 | 1 | 0 | 1 | 1 | 5 |
| 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
| 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| 0 | 0 | 1 | 1 | 0 | 0 | 0 | 2 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
| 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
| 0 | 1 | 0 | 0 | 1 | 0 | 0 | 2 |
| 1 | 1 | 1 | 0 | 0 | 0 | 0 | 3 |
mapMarkers <- c("IL2", "IL4/5/13", "IFNg", "TNFa", "IL17a", "CD154", "CD107a")
cd4NodeMarkerMap <- mapMarkers
# NodeMarkerMap names are gating tree paths
names(cd4NodeMarkerMap) <- paste0("4+", "/", c("IL2", "IL4513", "IFNG", "TNF", "IL17", "154", "107a"))
cd4_cats_mod <- as.data.frame(cd4_categories) %>%
dplyr::select(-Counts) %>%
mutate_all(~ recode(., "0" = "!", "1" = "")) %>%
dplyr::rename_at(all_of(cd4NodeMarkerMap), ~ names(cd4NodeMarkerMap))
cd4_booleanSubsets <- cd4_cats_mod %>%
rowwise() %>%
do(booleanSubset = paste(paste0(., colnames(cd4_cats_mod)), collapse="&")) %>%
ungroup() %>%
dplyr::pull(booleanSubset) %>%
unlist()
names(cd4_booleanSubsets) <- paste0("CD4_", gsub("4\\+\\/", "", gsub("\\&", "_AND_", gsub("\\!", "NOT_", cd4_booleanSubsets))))
for(booleanSubsetName in names(cd4_booleanSubsets)) {
# booleanSubset The booleanSubset (a combination of existing gates) in string format, e.g. "8+/GMM+&!8+/GAMMADELTA"
call <- substitute(flowWorkspace::booleanFilter(v), list(v = as.symbol(cd4_booleanSubsets[[booleanSubsetName]])))
g <- eval(call)
suppressWarnings(flowWorkspace::gs_pop_add(gs2, g, parent = "4+", name=booleanSubsetName))
}
cd4_gates_for_dimred <- c(
"/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
"/Time/LD-3+/1419-3+/S/Lymph/4+/TNF",
"/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+",
"/Time/LD-3+/1419-3+/S/Lymph/CD38+", "/Time/LD-3+/1419-3+/S/Lymph/HLADR+")
cd4_cytokine_gates <- c("/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
"/Time/LD-3+/1419-3+/S/Lymph/4+/TNF")
Still need to add boolean gates.
gs_pop_add(gs2, eval(substitute(flowWorkspace::booleanFilter(v),
list(v = as.symbol(paste(names(cd4_booleanSubsets), collapse="|"))))),
parent = "/Time/LD-3+/1419-3+/S/Lymph/4+", name = "CD4_COMPASS_Subsets")
## [1] 61
recompute(gs2, "/Time/LD-3+/1419-3+/S/Lymph/4+")
## .........................................................................................................................................................................................................................................................................................................................................................................................done!
cd4_compass_subsets_parentGate <- "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets"
pop_counts <- pData(gs2) %>%
left_join(gs_pop_get_count_fast(gs2, subpopulations = cd4_compass_subsets_parentGate),
by = c("rowname" = "name")) %>%
dplyr::rename(CD4_COMPASS_Subsets = Count) %>%
dplyr::select(rowname, Batch, "SAMPLE ID", STIM, Cohort, CD4_COMPASS_Subsets) %>%
dplyr::filter(!(Cohort %in% c(NA, "Healthy control", "Healthy control 2017-2018")) & STIM != "SEB")
Keep in mind that there is lopsided patient and group representation simply due to not sampling:
cd4_compass_subsets_sampleSizes_4plot <- pop_counts %>%
mutate(Cohort = factor(Cohort,
levels = c("Non-hospitalized", "Hospitalized"),
labels = c("Conv\nNon-Hosp", "Conv\nHosp")))
ggplot(cd4_compass_subsets_sampleSizes_4plot,
aes(factor(Cohort), CD4_COMPASS_Subsets)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.15, height = 0) +
theme_bw(base_size=20) +
labs(title="ICS CD4 UMAP patient representation",
y="CD4+ COMPASS Subset+ Events\n for Dimensionality Reduction\n(not sampled)") +
facet_grid(Batch ~ STIM) +
theme(axis.title.x = element_blank())
# Extract data for dimensionality reduction (not actually sampling)
call_sampleGatingHierarchy_for_cd4 <- function(currentSampleName) {
# print(sprintf("Sampling data from %s", currentSampleName))
sampleGatingHierarchy(gs2[[currentSampleName]], cd4_compass_subsets_parentGate, n = NULL, otherGates = cd4_gates_for_dimred)
}
cd4_compass_subsets_data <- map_dfr(pop_counts$rowname, call_sampleGatingHierarchy_for_cd4)
dim(cd4_compass_subsets_data)
## [1] 147945 55
knitr::kable(head(cd4_compass_subsets_data))
| name | EXPERIMENT NAME | $DATE | SAMPLE ID | PATIENT ID | STIM | WELL ID | PLATE NAME | filename | rowname | Sample ID | Collection date | Cell count | Cohort | Age | Sex | Race | Hispanic? | Days symptom onset to visit 1 | Pair ID | Race_v2 | Batch | /Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets | /Time/LD-3+/1419-3+/S/Lymph/4+/107a | /Time/LD-3+/1419-3+/S/Lymph/4+/154 | /Time/LD-3+/1419-3+/S/Lymph/4+/IFNG | /Time/LD-3+/1419-3+/S/Lymph/4+/IL2 | /Time/LD-3+/1419-3+/S/Lymph/4+/IL17 | /Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 | /Time/LD-3+/1419-3+/S/Lymph/4+/TNF | /Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+ | /Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+ | /Time/LD-3+/1419-3+/S/Lymph/CD38+ | /Time/LD-3+/1419-3+/S/Lymph/HLADR+ | Time | FSC-A | FSC-H | SSC-A | SSC-H | CD8b | TNFa | CD107a | CD154 | CD3 ECD | IL2 | CD4 | IL17a | IL4/5/13 | CD14/CD19 | CCR7 | CD38 | L/D | IFNg | CD45RA | HLADR |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1.502 | 115906.84 | 96585 | 27485.91 | 27497 | 1064.5720 | 1440.4530 | 393.4271 | 648.1245 | 2909.432 | 619.5652 | 1705.126 | 861.0718 | 777.4784 | 1025.7701 | 625.7065 | 631.8912 | 1210.5221 | 965.0119 | 1267.662 | 499.4432 |
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 1.525 | 88823.48 | 78464 | 17823.69 | 16608 | 1045.8657 | 619.7026 | 516.1617 | 1140.8926 | 3061.690 | 651.8842 | 1890.087 | 1941.8019 | 452.2898 | 784.3320 | 2844.8445 | 2105.2791 | 1321.1499 | 592.8734 | 2671.356 | 315.9288 |
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 1.869 | 76075.24 | 61697 | 15965.37 | 15489 | 1093.0385 | 613.5021 | 554.5132 | 790.8342 | 2874.763 | 235.4683 | 1892.790 | 1955.9308 | 589.2085 | 1058.5559 | 2115.3691 | 2029.1339 | 1105.2609 | 319.0016 | 3293.626 | 899.7007 |
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 2.011 | 50676.04 | 36166 | 13068.27 | 12006 | 337.6944 | 592.8991 | 446.3646 | 816.1756 | 2913.806 | 592.0229 | 1654.902 | 486.0011 | 2992.3889 | 979.4083 | 2385.1418 | 1194.2875 | 716.8176 | 820.3757 | 2494.246 | 561.7028 |
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 2.129 | 82473.68 | 64294 | 24553.14 | 23922 | 1192.3547 | 196.5198 | 658.7712 | 895.1425 | 2895.531 | 739.8292 | 1828.663 | 1943.6406 | 652.2826 | 266.8253 | 2725.9609 | 954.5881 | 1382.0779 | 331.3962 | 1562.413 | 484.5808 |
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 2.277 | 95741.76 | 77970 | 31374.81 | 30438 | 404.7302 | 2342.6272 | 421.5775 | 1543.7673 | 2929.278 | 1151.6838 | 1943.653 | 1273.3855 | 859.4286 | 847.0671 | 1992.1693 | 1230.5979 | 1355.7234 | 390.6719 | 1200.687 | 458.7832 |
Is there maybe one cytokine that is dominating the entire sample?
If one cytokine has very high background expression (and a generous gate), it could be gated positive in a lot of events.
The high number of events expressing this cytokine could lead to it dominating the data, so that most sampled events are positive for this noisy cytokine. It would drown out real signal from other cytokines.
cytokine_dominance <- cd4_compass_subsets_data %>%
group_by(Batch) %>%
summarise_at(cd4_cytokine_gates, sum) %>%
t() %>%
as.data.frame() %>%
set_names(c("B1", "B2", "B3")) %>%
rownames_to_column("Cytokine_Gate") %>%
dplyr::filter(Cytokine_Gate != "Batch")
knitr::kable(cytokine_dominance)
| Cytokine_Gate | B1 | B2 | B3 |
|---|---|---|---|
| /Time/LD-3+/1419-3+/S/Lymph/4+/107a | 10571 | 15787 | 12987 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/154 | 10647 | 12296 | 12535 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IFNG | 7346 | 4490 | 5410 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL2 | 10328 | 8292 | 9823 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL17 | 15257 | 1136 | 14258 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 | 6356 | 9478 | 10858 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/TNF | 11204 | 11027 | 14674 |
cytokine_dominance %>%
pivot_longer(cols = starts_with("B"), names_to = "Batch", values_to = "Events_in_Gate") %>%
mutate(Cytokine = sub(".*4\\+\\/(.*)", "\\1", Cytokine_Gate)) %>%
ggplot(aes(Cytokine, Events_in_Gate, fill = Cytokine)) +
theme_bw(base_size=18) +
geom_bar(stat="identity") +
facet_grid(. ~ Batch) +
labs(title = "CD4 Run Cytokine Dominance by Batch")
cols_4_dimred <- c("CD3 ECD", "CD8b", "CD4",
"TNFa", "CD107a",
"CD154", "IL2", "IL17a",
"IL4/5/13", "IFNg",
"CCR7", "CD45RA",
"CD38", "HLADR")
cd4.scaled_dimred_input <- cd4_compass_subsets_data %>%
dplyr::select(Batch, all_of(cols_4_dimred)) %>%
group_by(Batch) %>%
nest() %>%
ungroup() %>%
mutate(data = lapply(data, function(df) {as.data.frame(scale(as.matrix(df)))})) %>%
unnest(cols = c(data)) %>%
rename_at(vars(all_of(cols_4_dimred)),function(x) paste0(x,".scaled")) %>%
dplyr::select(-Batch)
cd4_compass_subsets_data <- cbind(cd4_compass_subsets_data, cd4.scaled_dimred_input)
# UMAP can take a long time, so there is a rerun_dimred switch
if(rerun_dimred) {
print("Running UMAP")
set.seed(date)
print(Sys.time())
cd4_compass_subsets_dimred_out <- cd4_compass_subsets_data %>%
# Run CD3, co-receptor, cytokine, memory, and activation markers through UMAP
dplyr::select(all_of(paste0(cols_4_dimred, ".scaled"))) %>%
uwot::umap(spread = 9, min_dist = 0.02, n_threads = 7)
print(Sys.time())
cd4_compass_subsets_w_umap <- cbind(as.data.frame(cd4_compass_subsets_dimred_out) %>%
dplyr::rename(x.umap = V1, y.umap = V2),
cd4_compass_subsets_data)
if(save_output) {
saveRDS(cd4_compass_subsets_w_umap, here::here(sprintf("out/UMAP/%s_ICS_CD4_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
}
} else {
# Assuming UMAP results are already saved
print("Loading saved UMAP run")
cd4_compass_subsets_w_umap <- readRDS(here::here(sprintf("out/UMAP/%s_ICS_CD4_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
}
## [1] "Loading saved UMAP run"
Shuffle data frame rows so e.g. Batch 3 doesn’t dominate foreground
set.seed(date)
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap[sample(nrow(cd4_compass_subsets_w_umap), nrow(cd4_compass_subsets_w_umap)),]
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
here::here("data/Arial_afm/ArialMT-Bold.afm"),
here::here("data/Arial_afm/ArialMT-Italic.afm"),
here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)
boolColorScheme <- c("FALSE" = "#E2E2E2", "TRUE" = "#023FA5")
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap %>%
mutate(cytokine_degree = rowSums(dplyr::select(., all_of(cd4_cytokine_gates))))
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap %>%
mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")),
STIM = factor(STIM, levels = c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")))
stim_labs <- c("DMSO", "S1", "S2", "NCAP", "VEMP")
names(stim_labs) <- c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")
cohort_labs <- c("Conv\nNon-Hosp", "Conv\nHosp")
names(cohort_labs) <- c("Non-hospitalized", "Hospitalized")
base_dimred_plot <- function(currentColumn, pointSize = 0.02, colorScheme = NA) {
p <- ggplot(cd4_compass_subsets_w_umap, aes(x=x.umap, y=y.umap,
colour=if(currentColumn %in% c("Batch", "SAMPLE ID")) {
factor(!!as.name(currentColumn))
} else {
as.logical(!!as.name(currentColumn))
})) +
geom_point(shape=20, alpha=0.8, size=pointSize) +
facet_grid(Cohort ~ STIM, switch="y",
labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
theme_bw() +
theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
strip.text=element_text(face="bold", size=22),
panel.grid.major = element_blank(),
legend.title=element_text(face="bold", size=14),
strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")))
if(!anyNA(colorScheme)) {
p <- p + scale_color_manual(values = colorScheme)
}
if(currentColumn %in% c("Batch", "SAMPLE ID")) {
p <- p + labs(color = currentColumn) +
guides(colour = guide_legend(override.aes = list(size=10))) +
theme(legend.title = element_text(size=15),
legend.text = element_text(size=15),
legend.position = "bottom") +
scale_colour_manual(values=as.character(iwanthue(length(unique(cd4_compass_subsets_w_umap[,currentColumn])))))
} else {
p <- p + theme(legend.position = "none")
}
p
}
base_dimred_plot("Batch")
base_dimred_plot("SAMPLE ID")
Now visualize the cytokine, memory, and activation expression localization
for(cg in cd4_gates_for_dimred) {
print(base_dimred_plot(cg, colorScheme = boolColorScheme) +
labs(title = sprintf("CD4+ COMPASS Subset+ UMAP\nColored by %s", sub(".*\\/([^(\\/)]+)", "\\1", cg))))
}
table(cd4_compass_subsets_w_umap$cytokine_degree)
##
## 1 2 3 4 5
## 113318 11137 14998 8286 206
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))
currentColumn <- "cytokine_degree"
pointSize <- 0.02
ggplot(cd4_compass_subsets_w_umap, aes(x=x.umap, y=y.umap, colour=!!as.name(currentColumn))) +
geom_point(shape=20, alpha=0.8, size=pointSize) +
facet_grid(Cohort ~ STIM, switch="y",
labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
labs(colour="Cytokine\nDegree",
title="CD4+ COMPASS Subset+ UMAP\nColored by Cytokine Polyfunctionality") +
theme_bw() +
theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
strip.text=element_text(face="bold", size=22),
panel.grid.major = element_blank(),
legend.title=element_text(face="bold", size=15),
legend.text = element_text(size=13),
strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")),
legend.position = "bottom") +
sc
pointSize <- 0.02
# Theme settings
# Expand axis limits so so contour lines don't get cut off near borders
gg_themes <- ggplot(cd4_compass_subsets_w_umap, aes(x=x.umap, y=y.umap)) +
scale_x_continuous(limits=range(cd4_compass_subsets_w_umap$x.umap)*1.05) +
scale_y_continuous(limits=range(cd4_compass_subsets_w_umap$y.umap)*1.05) +
theme_bw() +
theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
panel.grid = element_blank(),
legend.position = "none",
plot.margin = margin(1, 0, 0, 0, unit = "pt"))
# Hospitalization status contour plots
hosp_contour_plot <- gg_themes +
geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
geom_density_2d(data=cd4_compass_subsets_w_umap %>% dplyr::filter(Cohort == "Hospitalized"),
colour="#363636", bins=12) + # "red"
ggtitle("Hosp")
nonhosp_contour_plot <- gg_themes +
geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
geom_density_2d(data=cd4_compass_subsets_w_umap %>% dplyr::filter(Cohort == "Non-hospitalized"),
colour="#363636", bins=12) + # "blue"
ggtitle("Not-Hosp")
# Scaled mfi expression plots for memory, activation, and cytokine markers
mfi_col_text_4plot <- c("TNFa" = "TNF", "CD107a" = "CD107",
"CD154" = "CD154", "IL2" = "IL-2", "IL17a" = "IL17a",
"IL4/5/13" = "IL-4/5/13", "IFNg" = "IFN-γ",
"CCR7" = "CCR7", "CD45RA" = "CD45RA",
"CD38" = "CD38", "HLADR" = "HLA-DR")
plot_scaled_mfi_v2 <- function(currentColumn) {
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc_mfi <- scale_colour_gradientn(colours = myPalette(100), oob=squish, limits=c(-2.5, 2.5))
gg_themes +
geom_point(aes(colour=cd4_compass_subsets_w_umap[,paste0(currentColumn, ".scaled")]),
shape=20, alpha=0.8, size=pointSize) + # !!as.name(paste0(currentColumn, ".scaled")) doesn't work
sc_mfi +
ggtitle(mfi_col_text_4plot[[currentColumn]])
}
unstratified_mfi_plot_cols <- c("TNFa", "CD107a",
"CD154", "IL2", "IL17a",
"IL4/5/13", "IFNg",
"CCR7", "CD45RA",
"CD38", "HLADR")
mfi_plots_unstrat <- lapply(unstratified_mfi_plot_cols, plot_scaled_mfi_v2)
names(mfi_plots_unstrat) <- unstratified_mfi_plot_cols
# Memory UMAP plot colored by gate membership (TEMRA, TEM, TCM, Naive)
getMemoryCategory <- function(CD45RA, CCR7) {
if(CD45RA & CCR7) {
"Naive"
} else if(CD45RA & !CCR7) {
"TEMRA"
} else if(!CD45RA & CCR7) {
"TCM"
} else if(!CD45RA & !CCR7) {
"TEM"
} else {
"Uncategorized"
}
}
cd4_compass_subsets_w_umap$MemoryCategory <- pmap_chr(.l = list(cd4_compass_subsets_w_umap$`/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+`,
cd4_compass_subsets_w_umap$`/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+`),
.f = getMemoryCategory)
memColorScheme <- c("TEM" = "#fc8d62", "TEMRA" = "#66c2a5", "TCM" = "#e78ac3", "Naive" = "#8da0cb")
# c("TEM" = "#1b9e77", "TEMRA" = "#7570b3", "TCM" = "#e7298a", "Naive" = "#d95f02")
# c("TEM" = "#d7191c", "TEMRA" = "#2c7bb6", "TCM" = "green3", "Naive" = "darkorange", "Uncategorized" = "#90c9dd")
mem_plot_colored_by_gate <- gg_themes +
geom_point(aes(colour=cd4_compass_subsets_w_umap[,"MemoryCategory"]),
shape=20, alpha=0.8, size=pointSize) +
ggtitle("Memory") +
scale_color_manual(values = memColorScheme)
# + theme(legend.position = "right") + guides(colour = guide_legend(override.aes = list(size=9)))
# Activation plots: color by gate membership, one plot per marker
boolColorScheme <- c("0" = "#E2E2E2", "1" = "#363636")
hladr_bool_plot <- gg_themes +
geom_point(aes(colour=factor(cd4_compass_subsets_w_umap[,"/Time/LD-3+/1419-3+/S/Lymph/HLADR+"], levels = c(1, 0))),
shape=20, alpha=0.8, size=pointSize) +
scale_color_manual(values = boolColorScheme, labels=c("1"="Expressed", "0"="Not Expressed")) +
ggtitle("HLA-DR")
cd38_bool_plot <- gg_themes +
geom_point(aes(colour=factor(cd4_compass_subsets_w_umap[,"/Time/LD-3+/1419-3+/S/Lymph/CD38+"], levels = c(1, 0))),
shape=20, alpha=0.8, size=pointSize) +
scale_color_manual(values = boolColorScheme, labels=c("1"="Expressed", "0"="Not Expressed")) +
ggtitle("CD38")
# factor_colors = hsv((seq(0, 1, length.out = 4 + 1)[-1] +
# 0.2)%%1, 0.7, 0.95)
#
# stim_colors <- c("Spike 1" = "#fc68be", "Spike 2" = "#d94e09", "NCAP" = "#6B49F2", "VEMP" = "#D0F249", "DMSO" = "#91570a") # heatmap colors
# stim_colors <- c("Spike 1" = "#49F2BF", "Spike 2" = "#F2497C", "NCAP" = "#6B49F2", "VEMP" = "#D0F249", "DMSO" = "#91570a")
stim_abbreviations = c("Spike 1" = "S1", "Spike 2" = "S2", "NCAP" = "NCAP", "VEMP" = "VEMP", "DMSO" = "DMSO")
plot_stim_contour_plot <- function(currentStim) {
gg_themes +
geom_point(shape=20, alpha=0.8, size=pointSize, colour="#E2E2E2") +
geom_density_2d(data=cd4_compass_subsets_w_umap %>% dplyr::filter(STIM == currentStim),
colour="#363636", bins=12) + # stim_colors[[currentStim]]
ggtitle(stim_abbreviations[[currentStim]])
}
stims <- c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")
stim_plots <- lapply(stims, plot_stim_contour_plot)
names(stim_plots) <- stims
# Unstratified PolyF plot
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc_polyf <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))
polyf_plot_unstrat <- gg_themes +
geom_point(aes(colour=cd4_compass_subsets_w_umap[,"cytokine_degree"]), shape=20, alpha=0.8, size=pointSize) +
sc_polyf +
ggtitle("PolyF")
# Extract legends
mfi_legend <- as_ggplot(get_legend(mfi_plots_unstrat$TNFa +
theme(legend.position="bottom",
legend.title.align = 0.5) +
labs(colour="Scaled Expression") +
guides(colour=guide_colorbar(title.position = "top"))))
polyf_legend <- as_ggplot(get_legend(polyf_plot_unstrat +
theme(legend.position="bottom",
legend.title.align = 0.5) +
labs(colour="PolyF") +
guides(colour=guide_colorbar(title.position = "top"))))
mem_legend <- as_ggplot(get_legend(mem_plot_colored_by_gate +
theme(legend.position = "right",
legend.title.align = 0.5) +
guides(colour = guide_legend(override.aes = list(size=9))) +
labs(colour="Memory")))
bool_legend <- as_ggplot(get_legend(hladr_bool_plot +
theme(legend.position = "right",
legend.title.align = 0.5) +
guides(colour = guide_legend(override.aes = list(size=9))) +
labs(colour="Gate Membership")))
Put it all together
print(
# Top row
(hosp_contour_plot | nonhosp_contour_plot | plot_spacer() |
stim_plots$`Spike 1` | stim_plots$`Spike 2` | plot_spacer() |
polyf_plot_unstrat | plot_spacer() | mfi_plots_unstrat$IFNg) /
# Second row
(mem_plot_colored_by_gate | plot_spacer() | plot_spacer() |
stim_plots$NCAP | stim_plots$VEMP | plot_spacer() |
mfi_plots_unstrat$TNFa | mfi_plots_unstrat$IL2 | mfi_plots_unstrat$`IL4/5/13`) /
# Third row
(hladr_bool_plot | cd38_bool_plot | plot_spacer() |
stim_plots$DMSO | plot_spacer() | plot_spacer() |
mfi_plots_unstrat$IL17a | mfi_plots_unstrat$CD107a | mfi_plots_unstrat$CD154 )
)
if(save_output) {
png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP.png",
date)),
width=18, height=9, units="in", res=300)
print(
# Top row
(hosp_contour_plot | nonhosp_contour_plot | plot_spacer() |
stim_plots$`Spike 1` | stim_plots$`Spike 2` | plot_spacer() |
polyf_plot_unstrat | plot_spacer() | mfi_plots_unstrat$IFNg) /
# Second row
(mem_plot_colored_by_gate | plot_spacer() | plot_spacer() |
stim_plots$NCAP | stim_plots$VEMP | plot_spacer() |
mfi_plots_unstrat$TNFa | mfi_plots_unstrat$IL2 | mfi_plots_unstrat$`IL4/5/13`) /
# Third row
(hladr_bool_plot | cd38_bool_plot | plot_spacer() |
stim_plots$DMSO | plot_spacer() | plot_spacer() |
mfi_plots_unstrat$IL17a | mfi_plots_unstrat$CD107a | mfi_plots_unstrat$CD154 )
)
dev.off()
png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_MFI_Legend.png",
date)),
width=3, height=1.5, units="in", res=300)
print(mfi_legend)
dev.off()
png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_PolyF_Legend.png",
date)),
width=3, height=1.5, units="in", res=300)
print(polyf_legend)
dev.off()
png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_Memory_Legend.png",
date)),
width=2, height=3, units="in", res=300)
print(mem_legend)
dev.off()
png(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP_GateMembership_Legend.png",
date)),
width=2, height=3, units="in", res=300)
print(bool_legend)
dev.off()
# cairo_pdf(file=here::here(sprintf("out/UMAP/%s_CD4_COMPASS_Subsets_UMAP.cairo.pdf",
# date)),
# width=12, height=9, onefile = TRUE, bg = "transparent", family = "Arial")
# print(x)
# dev.off()
}
## png
## 2